[435c0f]: / Experimental_Data / Reranker / Answer_evaluation / Evaluation / Evaluation of Answer Generation.ipynb

Download this file

821 lines (821 with data), 33.5 kB

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-a306_WSUXk0"
      },
      "source": [
        "# Evaluation of Answer Generation performance with BERTscore\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HV93aEaWniBx",
        "outputId": "b5f4d3d3-6ed1-4bcf-8f6f-fba5c3b8743a"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.0/105.0 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.9/270.9 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Building wheel for accelerate (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.1/61.1 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m798.0/798.0 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m38.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.6/216.6 kB\u001b[0m \u001b[31m25.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.3/48.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
          ]
        }
      ],
      "source": [
        "!pip install -q -U bitsandbytes\n",
        "!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
        "!pip install -q -U git+https://github.com/huggingface/peft.git\n",
        "!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
        "!pip install -q datasets\n",
        "!pip install -q bert_score\n",
        "!pip install -q faiss-gpu\n",
        "!pip install -q langchain\n",
        "!pip install -q sentence_transformers"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "-xkfr66OepkC"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "\n",
        "from datasets import Dataset"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import json\n",
        "from datasets import load_metric\n",
        "\n",
        "# Load csv dataset\n",
        "def load_dataset_from_csv(file_path):\n",
        "    return pd.read_csv(file_path)\n",
        "\n",
        "# Calculate BERTScore\n",
        "def bertscore_(results_file_path, output_file_path, csv_file_path, bertscore):\n",
        "    with open(results_file_path, 'r') as file:\n",
        "        predictions = json.load(file)\n",
        "\n",
        "    test_dataset = load_dataset_from_csv(csv_file_path)\n",
        "    ref_ans = test_dataset['Answer'].tolist()\n",
        "\n",
        "    results = bertscore.compute(predictions=predictions, references=ref_ans, lang=\"en\", model_type='microsoft/deberta-xlarge-mnli')\n",
        "\n",
        "    with open(output_file_path, 'w') as file:\n",
        "        json.dump(results, file)\n",
        "\n",
        "    print(f\"Results saved to {output_file_path}\")\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 17
        },
        "id": "iZFLSZ8dOLGk",
        "outputId": "9ff14328-2b50-4fa6-a932-a3332114484d"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "test_dataset = \"/content/Test_dataset.csv\"\n",
        "bertscore = load_metric(\"bertscore\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 106
        },
        "id": "5Otdn4clQWAX",
        "outputId": "84f63d58-df35-4862-a3fc-9117e103fa08"
      },
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/datasets/load.py:752: FutureWarning: The repository for bertscore contains custom code which must be executed to correctly load the metric. You can inspect the repository content at https://raw.githubusercontent.com/huggingface/datasets/2.16.1/metrics/bertscore/bertscore.py\n",
            "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
            "Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.\n",
            "  warnings.warn(\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "results_file_path = '/content/Answer_HyDE_with_reranker.json'\n",
        "output_file_path = '/content/Bertscore_Answer_HyDE_with_reranker.json'\n",
        "\n",
        "bertscore_(results_file_path, output_file_path, test_dataset, bertscore)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "G3q5c6JG16_l",
        "outputId": "e152298f-dffd-477c-e666-30d93ebfc705"
      },
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Results saved to /content/Bertscore_Answer_HyDE_with_reranker.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "results_file_path = '/content/Answer_QA_RAG.json'\n",
        "output_file_path = '/content/Bertscore_Answer_QA_RAG.json'\n",
        "\n",
        "bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "43oCrBvP2b3D",
        "outputId": "8d07e7ae-eb16-4ffa-ed6c-729ae11c8293"
      },
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Results saved to /content/Bertscore_Answer_QA_RAG.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "results_file_path = '/content/Answer_Only_question.json'\n",
        "output_file_path = '/content/Bertscore_Answer_Only_question.json'\n",
        "\n",
        "bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "J5zyNLJoyLT3",
        "outputId": "d2598938-13d9-4250-b242-554ee79b702a"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Results saved to /content/Bertscore_Answer_Only_question.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "results_file_path =  '/content/Answer_Multiquery_questions.json'\n",
        "output_file_path = '/content/Bertscore_Answer_Multiquery_questions.json'\n",
        "\n",
        "bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "6TDxxjfDyLQ1",
        "outputId": "632a98a8-b94c-45a3-808d-5505e7f94c5f"
      },
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Results saved to /content/Bertscore_Answer_Multiquery_questions.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "results_file_path = '/content/Answer_Only_hypothetical_answer.json'\n",
        "output_file_path = '/content/Bertscore_Answer_Only_hypothetical_answer.json'\n",
        "\n",
        "bertscore_(results_file_path, output_file_path, test_dataset, bertscore)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "_YglqHYAyLN-",
        "outputId": "979eaa6a-e0bf-4e8f-b719-cc1878865ad9"
      },
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Results saved to /content/Bertscore_Answer_Only_hypothetical_answer.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Dx_S_L3U-8Ni"
      },
      "source": [
        "#### Analyze"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 30,
      "metadata": {
        "id": "uTpr6qOCvNsy",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 17
        },
        "outputId": "a8e99a98-1879-483e-c5be-6f39aa5cf5e1"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "\n",
        "# Function to load JSON data\n",
        "def load_json(filename):\n",
        "    with open(filename, 'r') as file:\n",
        "        return json.load(file)\n",
        "\n",
        "# Load BERTScore results\n",
        "Bertscore_Answer_HyDE_with_reranker = load_json('/content/Bertscore_Answer_HyDE_with_reranker.json')\n",
        "Bertscore_Answer_QA_RAG = load_json('/content/Bertscore_Answer_QA_RAG.json')\n",
        "Bertscore_Answer_Only_question = load_json('/content/Bertscore_Answer_Only_question.json')\n",
        "Bertscore_Answer_Multiquery_questions = load_json('/content/Bertscore_Answer_Multiquery_questions.json')\n",
        "Bertscore_Answer_Only_hypothetical_answer = load_json('/content/Bertscore_Answer_Only_hypothetical_answer.json')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 31,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "KFM7TNLR-5pg",
        "outputId": "8f77f9a5-ae56-456d-8128-98fba87ff2f6"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "  <style>\n",
              "    pre {\n",
              "        white-space: pre-wrap;\n",
              "    }\n",
              "  </style>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                          precision    recall        f1\n",
              "QA_RAG                     0.550603  0.645380  0.591428\n",
              "Multiquery_questions       0.532219  0.629463  0.572945\n",
              "HyDE_with_reranker         0.539636  0.641390  0.582352\n",
              "Only_question              0.540208  0.635625  0.580522\n",
              "Only_hypothetical_answer   0.538593  0.642369  0.582680"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>precision</th>\n",
              "      <th>recall</th>\n",
              "      <th>f1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>QA_RAG</th>\n",
              "      <td>0.550603</td>\n",
              "      <td>0.645380</td>\n",
              "      <td>0.591428</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Multiquery_questions</th>\n",
              "      <td>0.532219</td>\n",
              "      <td>0.629463</td>\n",
              "      <td>0.572945</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>HyDE_with_reranker</th>\n",
              "      <td>0.539636</td>\n",
              "      <td>0.641390</td>\n",
              "      <td>0.582352</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Only_question</th>\n",
              "      <td>0.540208</td>\n",
              "      <td>0.635625</td>\n",
              "      <td>0.580522</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Only_hypothetical_answer</th>\n",
              "      <td>0.538593</td>\n",
              "      <td>0.642369</td>\n",
              "      <td>0.582680</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-1fafef96-2bc1-4048-ad16-76b2b9e23bf7');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-32e405b9-713e-40e2-8a2d-0317aa3ee4bc\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-32e405b9-713e-40e2-8a2d-0317aa3ee4bc')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-32e405b9-713e-40e2-8a2d-0317aa3ee4bc button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ]
          },
          "metadata": {},
          "execution_count": 31
        }
      ],
      "source": [
        "def calculate_statistics(data):\n",
        "    statistics = {}\n",
        "    for key in ['precision', 'recall', 'f1']:\n",
        "        statistics[key] = np.mean(data[key])\n",
        "    return statistics\n",
        "\n",
        "Bertscore_Answer_HyDE_with_reranker_stats = calculate_statistics(Bertscore_Answer_HyDE_with_reranker)\n",
        "Bertscore_Answer_QA_RAG_stats = calculate_statistics(Bertscore_Answer_QA_RAG)\n",
        "Bertscore_Answer_Only_question_stats = calculate_statistics(Bertscore_Answer_Only_question)\n",
        "Bertscore_Answer_Multiquery_questions_stats = calculate_statistics(Bertscore_Answer_Multiquery_questions)\n",
        "Bertscore_Answer_Only_hypothetical_answer_stats = calculate_statistics(Bertscore_Answer_Only_hypothetical_answer)\n",
        "\n",
        "stats_df = pd.DataFrame({'QA_RAG':Bertscore_Answer_QA_RAG_stats, 'Multiquery_questions':Bertscore_Answer_Multiquery_questions_stats, 'HyDE_with_reranker':Bertscore_Answer_HyDE_with_reranker_stats, 'Only_question':Bertscore_Answer_Only_question_stats, 'Only_hypothetical_answer':Bertscore_Answer_Only_hypothetical_answer_stats})\n",
        "\n",
        "stats_df.transpose()"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "machine_shape": "hm",
      "provenance": [],
      "gpuType": "A100"
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}